Install and load packages

Load required packages for the specific analysis.

library(tidycensus) # Access census data
library(tidyverse)
library(sf)         # Working with simple features - geospatial features
library(tmap)
library(lubridate)  
library(janitor)    # To clean names of variables in the data
library(here)       # A package I haven't taught you about before that doesn't do much, but ....

Going to need an API key to access the data. We can acquire a key from the following website: http://api.census.gov/data/key_signup.html

census_api_key("ef8a7b63162e3c4881110f495943872be80639fd", overwrite=TRUE)
To install your API key for use in future sessions, run this function with `install = TRUE`.

Required Census codes for demographic variables. Selection is based on the most popular demographic values: Age, Gender, Ethnicity, Income

cens_vars <- load_variables(2018, "acs5", cache = TRUE)

Getting together the 2018 DC Census Data

df_census <- get_acs(geography = "tract",                  
                  variables=c("median_inc"="B06011_001",
                              "pop_tot"="B01001_001",
                              "pop_white"="B01001H_001",
                              "pop_black"="B02009_001",
                              "age"="B01002_001",
                              "male"="B01001_002",
                              "female"="B01001_026"),
                  state="DC",geometry=TRUE,year=2018) %>%
  clean_names()
Getting data from the 2014-2018 5-year ACS
Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
Using FIPS code '11' for state 'DC'

Now we should classify and plot the data frame we just created

class(df_census)
[1] "sf"         "data.frame"
plot(df_census)

Now we should clean the census data frame and add percentages for population and sex variables.

df_cens=df_census %>% select(-moe) %>% spread(variable,estimate) %>%
  mutate(pop_black_pct = pop_black/pop_tot, 
         pop_white_pct=pop_white/pop_tot, 
         male_pct=male/pop_tot,
         female_pct=female/pop_tot)

Visualize demographic outcome in maps.

# Visualize median income distribution in Washington D.C.
tm_shape(df_cens) +tm_polygons("median_inc",alpha=.5)
# Visualize age distribution in Washington D.C.
tm_shape(df_cens) +tm_polygons("age",alpha=.5)
# Visualize population distribution in Washington D.C.
tm_shape(df_cens) +tm_polygons("pop_tot",alpha=.5)
# Visualize black population distribution in Washington D.C.
tm_shape(df_cens) +tm_polygons("pop_black",alpha=.5)
# Visualize white population distribution in Washington D.C.
tm_shape(df_cens) +tm_polygons("pop_white",alpha=.5)
# Visualize male population distribution in Washington D.C.
tm_shape(df_cens) +tm_polygons("male",alpha=.5)
# Visualize female distribution in Washington D.C.
tm_shape(df_cens) +tm_polygons("female",alpha=.5)

Now we need to read in and classify the DC neighborhood data

neigh=st_read(here("data", "raw", "DC_Health_Planning_Neighborhoods.geojson")) %>% clean_names()
Reading layer `DC_Health_Planning_Neighborhoods' from data source 
  `C:\Users\Tarak\Documents\CollegeStuff\SeniorYear\Fall2021\DS241\BikeProject\data\raw\DC_Health_Planning_Neighborhoods.geojson' 
  using driver `GeoJSON'
Simple feature collection with 51 features and 8 fields
Geometry type: POLYGON
Dimension:     XY
Bounding box:  xmin: -77.11976 ymin: 38.79165 xmax: -76.9094 ymax: 38.99556
Geodetic CRS:  WGS 84
class(neigh)
[1] "sf"         "data.frame"

Now reading in the COVID 19 DC data

df_c=readxl::read_excel(here("data","raw","neigh_cases.xlsx"),
                            col_types = c("date", "text", "numeric")) %>% 
  clean_names()

# We should filter the data so we can better analyze it
df_cases=df_c %>%
  filter(as_date(date) == "2021-11-17") %>% 
  separate(neighborhood,into=c("code","name"),sep = ":") %>%
  mutate(code=case_when(
    code=="N35" ~"N0",
    TRUE ~ code
  ))

Now we progress to joining these data frames to accompish the project task

# Need to join neighborhood data with covid case data
neigh2=left_join(neigh,df_cases,by=c("code")) 
tmap_mode("view")
tmap mode set to interactive viewing
# Create map of total positive covid cases in DC
tm_shape(neigh2) +tm_polygons("total_positives", alpha=.5)
  tm_shape(neigh2) +tm_borders(col="blue",lwd=5,alpha=.2)+
  tm_shape(df_cens) +tm_borders(col="red",lwd=1,alpha=.3)
#df_j=st_join(df_cens,neigh2,prepared=FALSE)
df_cens_adj=df_cens %>% st_transform(4326)
df_j=st_join(df_cens_adj,neigh2,largest=TRUE)
Warning: attribute variables are assumed to be spatially constant throughout all geometries
df_final=df_j %>% select(median_inc, pop_tot, pop_black, pop_white, age, male, female, total_positives, objectid) %>%
  group_by(objectid) %>%
  summarise(pop_n=sum(pop_tot),
            pop_black_n=sum(pop_black),
            pop_white_n=sum(pop_white),
            male_n=sum(male),
            female_n=sum(female),
            age_n=sum(age)/n(),
            adj_median_income=sum(pop_tot*median_inc)/pop_n,
            tot_positives_n = sum(total_positives)/n())
            
plot(df_final)

tm_shape(df_final)+tm_polygons(c("adj_median_income","tot_positives_n", "pop_black_n", "pop_white_n"))
LS0tDQp0aXRsZTogIkRlbW9ncmFwaGljIERDIHRpZHljZW5zdXMgZGF0YSBhbmFseXNpcyINCmF1dGhvcjogIlRhcmFrIFBhdGVsIg0Kb3V0cHV0Og0KICBodG1sX2RvY3VtZW50Og0KICAgIGRmX3ByaW50OiBwYWdlZA0KLS0tDQojIyBJbnN0YWxsIGFuZCBsb2FkIHBhY2thZ2VzDQpMb2FkIHJlcXVpcmVkIHBhY2thZ2VzIGZvciB0aGUgc3BlY2lmaWMgYW5hbHlzaXMuDQoNCmBgYHtyfQ0KbGlicmFyeSh0aWR5Y2Vuc3VzKSAjIEFjY2VzcyBjZW5zdXMgZGF0YQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpsaWJyYXJ5KHNmKSAgICAgICAgICMgV29ya2luZyB3aXRoIHNpbXBsZSBmZWF0dXJlcyAtIGdlb3NwYXRpYWwgZmVhdHVyZXMNCmxpYnJhcnkodG1hcCkNCmxpYnJhcnkobHVicmlkYXRlKSAgDQpsaWJyYXJ5KGphbml0b3IpICAgICMgVG8gY2xlYW4gbmFtZXMgb2YgdmFyaWFibGVzIGluIHRoZSBkYXRhDQpsaWJyYXJ5KGhlcmUpICAgICAgICMgQSBwYWNrYWdlIEkgaGF2ZW4ndCB0YXVnaHQgeW91IGFib3V0IGJlZm9yZSB0aGF0IGRvZXNuJ3QgZG8gbXVjaCwgYnV0IC4uLi4NCmBgYA0KDQpHb2luZyB0byBuZWVkIGFuIEFQSSBrZXkgdG8gYWNjZXNzIHRoZSBkYXRhLiBXZSBjYW4gYWNxdWlyZSBhIGtleSBmcm9tIHRoZSBmb2xsb3dpbmcgd2Vic2l0ZTogaHR0cDovL2FwaS5jZW5zdXMuZ292L2RhdGEva2V5X3NpZ251cC5odG1sIA0KDQpgYGB7cn0NCmNlbnN1c19hcGlfa2V5KCJlZjhhN2I2MzE2MmUzYzQ4ODExMTBmNDk1OTQzODcyYmU4MDYzOWZkIiwgb3ZlcndyaXRlPVRSVUUpDQpgYGANCg0KUmVxdWlyZWQgQ2Vuc3VzIGNvZGVzIGZvciBkZW1vZ3JhcGhpYyB2YXJpYWJsZXMuIFNlbGVjdGlvbiBpcyBiYXNlZCBvbiB0aGUgbW9zdCBwb3B1bGFyIGRlbW9ncmFwaGljIHZhbHVlczogQWdlLCBHZW5kZXIsIEV0aG5pY2l0eSwgSW5jb21lDQoNCi0gKipCMDEwMDJfMDAxKiogICAgREMgQWdlIERpc3RyaWJ1dGlvbg0KLSAqKkIwMTAwMV8wMDIqKiAgICBTZXggKE0pDQotICoqQjAxMDAxXzAyNioqICAgIFNleCAoRikNCi0gKipCMDEwMDFfMDAxKiogICAgVG90YWwgcG9wdWxhdGlvbg0KLSAqKkIwMTAwMUhfMDAxKiogICBQb3B1bGF0aW9uICh3aGl0ZSkNCi0gKipCMDIwMDlfMDAxKiogICAgUG9wdWxhdGlvbiAoYmxhY2spDQotICoqQjE5MDEzXzAwMSoqICAgIE1lZGlhbiBob3VzZWhvbGQgaW5jb21lIGRpc3RyaWJ1dGlvbiBpbiBELkMuDQoNCg0KYGBge3J9DQpjZW5zX3ZhcnMgPC0gbG9hZF92YXJpYWJsZXMoMjAxOCwgImFjczUiLCBjYWNoZSA9IFRSVUUpDQpgYGANCg0KIyMgR2V0dGluZyB0b2dldGhlciB0aGUgMjAxOCBEQyBDZW5zdXMgRGF0YQ0KDQpgYGB7cn0NCmRmX2NlbnN1cyA8LSBnZXRfYWNzKGdlb2dyYXBoeSA9ICJ0cmFjdCIsICAgICAgICAgICAgICAgICAgDQogICAgICAgICAgICAgICAgICB2YXJpYWJsZXM9YygibWVkaWFuX2luYyI9IkIwNjAxMV8wMDEiLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgInBvcF90b3QiPSJCMDEwMDFfMDAxIiwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICJwb3Bfd2hpdGUiPSJCMDEwMDFIXzAwMSIsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAicG9wX2JsYWNrIj0iQjAyMDA5XzAwMSIsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAiYWdlIj0iQjAxMDAyXzAwMSIsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAibWFsZSI9IkIwMTAwMV8wMDIiLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgImZlbWFsZSI9IkIwMTAwMV8wMjYiKSwNCiAgICAgICAgICAgICAgICAgIHN0YXRlPSJEQyIsZ2VvbWV0cnk9VFJVRSx5ZWFyPTIwMTgpICU+JQ0KICBjbGVhbl9uYW1lcygpDQpgYGANCk5vdyB3ZSBzaG91bGQgY2xhc3NpZnkgYW5kIHBsb3QgdGhlIGRhdGEgZnJhbWUgd2UganVzdCBjcmVhdGVkDQoNCmBgYHtyfQ0KY2xhc3MoZGZfY2Vuc3VzKQ0KcGxvdChkZl9jZW5zdXMpDQpgYGANCg0KTm93IHdlIHNob3VsZCBjbGVhbiB0aGUgY2Vuc3VzIGRhdGEgZnJhbWUgYW5kIGFkZCBwZXJjZW50YWdlcyBmb3IgcG9wdWxhdGlvbiBhbmQgc2V4IHZhcmlhYmxlcy4NCg0KYGBge3J9DQpkZl9jZW5zPWRmX2NlbnN1cyAlPiUgc2VsZWN0KC1tb2UpICU+JSBzcHJlYWQodmFyaWFibGUsZXN0aW1hdGUpICU+JQ0KICBtdXRhdGUocG9wX2JsYWNrX3BjdCA9IHBvcF9ibGFjay9wb3BfdG90LCANCiAgICAgICAgIHBvcF93aGl0ZV9wY3Q9cG9wX3doaXRlL3BvcF90b3QsIA0KICAgICAgICAgbWFsZV9wY3Q9bWFsZS9wb3BfdG90LA0KICAgICAgICAgZmVtYWxlX3BjdD1mZW1hbGUvcG9wX3RvdCkNCmBgYA0KDQoNClZpc3VhbGl6ZSBkZW1vZ3JhcGhpYyBvdXRjb21lIGluIG1hcHMuDQpgYGB7cn0NCiMgVmlzdWFsaXplIG1lZGlhbiBpbmNvbWUgZGlzdHJpYnV0aW9uIGluIFdhc2hpbmd0b24gRC5DLg0KdG1fc2hhcGUoZGZfY2VucykgK3RtX3BvbHlnb25zKCJtZWRpYW5faW5jIixhbHBoYT0uNSkNCiMgVmlzdWFsaXplIGFnZSBkaXN0cmlidXRpb24gaW4gV2FzaGluZ3RvbiBELkMuDQp0bV9zaGFwZShkZl9jZW5zKSArdG1fcG9seWdvbnMoImFnZSIsYWxwaGE9LjUpDQojIFZpc3VhbGl6ZSBwb3B1bGF0aW9uIGRpc3RyaWJ1dGlvbiBpbiBXYXNoaW5ndG9uIEQuQy4NCnRtX3NoYXBlKGRmX2NlbnMpICt0bV9wb2x5Z29ucygicG9wX3RvdCIsYWxwaGE9LjUpDQojIFZpc3VhbGl6ZSBibGFjayBwb3B1bGF0aW9uIGRpc3RyaWJ1dGlvbiBpbiBXYXNoaW5ndG9uIEQuQy4NCnRtX3NoYXBlKGRmX2NlbnMpICt0bV9wb2x5Z29ucygicG9wX2JsYWNrIixhbHBoYT0uNSkNCiMgVmlzdWFsaXplIHdoaXRlIHBvcHVsYXRpb24gZGlzdHJpYnV0aW9uIGluIFdhc2hpbmd0b24gRC5DLg0KdG1fc2hhcGUoZGZfY2VucykgK3RtX3BvbHlnb25zKCJwb3Bfd2hpdGUiLGFscGhhPS41KQ0KIyBWaXN1YWxpemUgbWFsZSBwb3B1bGF0aW9uIGRpc3RyaWJ1dGlvbiBpbiBXYXNoaW5ndG9uIEQuQy4NCnRtX3NoYXBlKGRmX2NlbnMpICt0bV9wb2x5Z29ucygibWFsZSIsYWxwaGE9LjUpDQojIFZpc3VhbGl6ZSBmZW1hbGUgZGlzdHJpYnV0aW9uIGluIFdhc2hpbmd0b24gRC5DLg0KdG1fc2hhcGUoZGZfY2VucykgK3RtX3BvbHlnb25zKCJmZW1hbGUiLGFscGhhPS41KQ0KYGBgDQoNCiMgTm93IHdlIG5lZWQgdG8gcmVhZCBpbiBhbmQgY2xhc3NpZnkgdGhlIERDIG5laWdoYm9yaG9vZCBkYXRhDQoNCmBgYHtyfQ0KbmVpZ2g9c3RfcmVhZChoZXJlKCJkYXRhIiwgInJhdyIsICJEQ19IZWFsdGhfUGxhbm5pbmdfTmVpZ2hib3Job29kcy5nZW9qc29uIikpICU+JSBjbGVhbl9uYW1lcygpDQpjbGFzcyhuZWlnaCkNCmBgYA0KIyBOb3cgcmVhZGluZyBpbiB0aGUgQ09WSUQgMTkgREMgZGF0YQ0KDQpgYGB7ciBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQ0KZGZfYz1yZWFkeGw6OnJlYWRfZXhjZWwoaGVyZSgiZGF0YSIsInJhdyIsIm5laWdoX2Nhc2VzLnhsc3giKSwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBjb2xfdHlwZXMgPSBjKCJkYXRlIiwgInRleHQiLCAibnVtZXJpYyIpKSAlPiUgDQogIGNsZWFuX25hbWVzKCkNCg0KIyBXZSBzaG91bGQgZmlsdGVyIHRoZSBkYXRhIHNvIHdlIGNhbiBiZXR0ZXIgYW5hbHl6ZSBpdA0KZGZfY2FzZXM9ZGZfYyAlPiUNCiAgZmlsdGVyKGFzX2RhdGUoZGF0ZSkgPT0gIjIwMjEtMTEtMTciKSAlPiUgDQogIHNlcGFyYXRlKG5laWdoYm9yaG9vZCxpbnRvPWMoImNvZGUiLCJuYW1lIiksc2VwID0gIjoiKSAlPiUNCiAgbXV0YXRlKGNvZGU9Y2FzZV93aGVuKA0KICAgIGNvZGU9PSJOMzUiIH4iTjAiLA0KICAgIFRSVUUgfiBjb2RlDQogICkpDQpgYGANCg0KIyMgTm93IHdlIHByb2dyZXNzIHRvIGpvaW5pbmcgdGhlc2UgZGF0YSBmcmFtZXMgdG8gYWNjb21waXNoIHRoZSBwcm9qZWN0IHRhc2sNCg0KYGBge3J9DQojIE5lZWQgdG8gam9pbiBuZWlnaGJvcmhvb2QgZGF0YSB3aXRoIGNvdmlkIGNhc2UgZGF0YQ0KbmVpZ2gyPWxlZnRfam9pbihuZWlnaCxkZl9jYXNlcyxieT1jKCJjb2RlIikpIA0KdG1hcF9tb2RlKCJ2aWV3IikNCiMgQ3JlYXRlIG1hcCBvZiB0b3RhbCBwb3NpdGl2ZSBjb3ZpZCBjYXNlcyBpbiBEQw0KdG1fc2hhcGUobmVpZ2gyKSArdG1fcG9seWdvbnMoInRvdGFsX3Bvc2l0aXZlcyIsIGFscGhhPS41KQ0KYGBgDQpgYGB7cn0NCiAgdG1fc2hhcGUobmVpZ2gyKSArdG1fYm9yZGVycyhjb2w9ImJsdWUiLGx3ZD01LGFscGhhPS4yKSsNCiAgdG1fc2hhcGUoZGZfY2VucykgK3RtX2JvcmRlcnMoY29sPSJyZWQiLGx3ZD0xLGFscGhhPS4zKQ0KYGBgDQoNCmBgYHtyfQ0KI2RmX2o9c3Rfam9pbihkZl9jZW5zLG5laWdoMixwcmVwYXJlZD1GQUxTRSkNCmBgYA0KDQpgYGB7cn0NCmRmX2NlbnNfYWRqPWRmX2NlbnMgJT4lIHN0X3RyYW5zZm9ybSg0MzI2KQ0KYGBgDQoNCmBgYHtyfQ0KZGZfaj1zdF9qb2luKGRmX2NlbnNfYWRqLG5laWdoMixsYXJnZXN0PVRSVUUpDQpgYGANCg0KYGBge3J9DQpkZl9maW5hbD1kZl9qICU+JSBzZWxlY3QobWVkaWFuX2luYywgcG9wX3RvdCwgcG9wX2JsYWNrLCBwb3Bfd2hpdGUsIGFnZSwgbWFsZSwgZmVtYWxlLCB0b3RhbF9wb3NpdGl2ZXMsIG9iamVjdGlkKSAlPiUNCiAgZ3JvdXBfYnkob2JqZWN0aWQpICU+JQ0KICBzdW1tYXJpc2UocG9wX249c3VtKHBvcF90b3QpLA0KICAgICAgICAgICAgcG9wX2JsYWNrX249c3VtKHBvcF9ibGFjayksDQogICAgICAgICAgICBwb3Bfd2hpdGVfbj1zdW0ocG9wX3doaXRlKSwNCiAgICAgICAgICAgIG1hbGVfbj1zdW0obWFsZSksDQogICAgICAgICAgICBmZW1hbGVfbj1zdW0oZmVtYWxlKSwNCiAgICAgICAgICAgIGFnZV9uPXN1bShhZ2UpL24oKSwNCiAgICAgICAgICAgIGFkal9tZWRpYW5faW5jb21lPXN1bShwb3BfdG90Km1lZGlhbl9pbmMpL3BvcF9uLA0KICAgICAgICAgICAgdG90X3Bvc2l0aXZlc19uID0gc3VtKHRvdGFsX3Bvc2l0aXZlcykvbigpKQ0KICAgICAgICAgICAgDQpwbG90KGRmX2ZpbmFsKQ0KdG1fc2hhcGUoZGZfZmluYWwpK3RtX3BvbHlnb25zKGMoImFkal9tZWRpYW5faW5jb21lIiwidG90X3Bvc2l0aXZlc19uIiwgInBvcF9ibGFja19uIiwgInBvcF93aGl0ZV9uIikpDQpgYGANCg==